# install.packages("sf")
# install.packages("leaflet")
library(sf)
library(tidyverse)
library(leaflet)
I have been provided these three datasets for this project:
# read the csv of burglaries data into the notebook
burglary_incidents <- read_csv('../data/burglaries_2023.csv')
# view the dataset
burglary_incidents
NA
Investigate the ethnicity column some:
# investigate the ethnicity column some
unique(burglary_incidents[["victim_ethnicity"]])
[1] "Non-Hispanic" NA "Hispanic" "Unknown"
# read the csv of census data into the notebook
census <- read_csv('../data/census.csv')
# view the dataset
census
NA
# read in the shape file data for Davidson county census tracts
# read in the DC file data
dav_cty_census_tracts <- read_sf('../data/DC/DC.shp')
dav_cty_census_tracts
Simple feature collection with 174 features and 12 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -87.0547 ymin: 35.96778 xmax: -86.51559 ymax: 36.4055
Geodetic CRS: NAD83
burglary_incidents
dav_cty_census_tracts |>
ggplot() +
geom_sf()
dav_cty_census_tracts |>
ggplot() +
geom_sf(aes(fill = ALAND))
Perform a spatial join to determine the census tract in which each burglary occurred. Hint: You may want to make use of the st_as_sf function in order to convert the burglaries data into an sf object.
# performed spatial join
burglary_incidents
burglary_incidents_mapped <- st_as_sf(
burglary_incidents |>
drop_na(latitude) |>
drop_na(longitude),
coords = c('longitude', 'latitude'),
crs = st_crs(dav_cty_census_tracts)
)
burglary_incidents_mapped
Simple feature collection with 1146 features and 27 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -92.51 ymin: 34.15 xmax: -86.557 ymax: 36.34
Geodetic CRS: NAD83
Rename column in Davidson county census tract data so that the merge goes more smoothly.
dav_cty_census_tracts <- rename(dav_cty_census_tracts, tract_name = NAME)
Merge census csv data with dav_cty_census_tracts DC shape file data.
census_tracts <- merge(dav_cty_census_tracts, census, by.x = "TRACTCE", by.y = "tract", all = TRUE)
census_tracts
Simple feature collection with 174 features and 17 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -87.0547 ymin: 35.96778 xmax: -86.51559 ymax: 36.4055
Geodetic CRS: NAD83
First 10 features:
TRACTCE STATEFP COUNTYFP GEOID tract_name NAMELSAD MTFCC FUNCSTAT ALAND AWATER
1 010103 47 037 47037010103 101.03 Census Tract 101.03 G5020 S 48034082 61097
2 010104 47 037 47037010104 101.04 Census Tract 101.04 G5020 S 65057849 251504
3 010105 47 037 47037010105 101.05 Census Tract 101.05 G5020 S 28328799 1093
4 010106 47 037 47037010106 101.06 Census Tract 101.06 G5020 S 21616474 6845
5 010201 47 037 47037010201 102.01 Census Tract 102.01 G5020 S 23718545 0
6 010202 47 037 47037010202 102.02 Census Tract 102.02 G5020 S 68394934 77571
7 010301 47 037 47037010301 103.01 Census Tract 103.01 G5020 S 8527942 11775
8 010302 47 037 47037010302 103.02 Census Tract 103.02 G5020 S 4179336 6813
9 010303 47 037 47037010303 103.03 Census Tract 103.03 G5020 S 4508896 142888
10 010401 47 037 47037010401 104.01 Census Tract 104.01 G5020 S 9543414 320298
INTPTLAT INTPTLON NAME state county population
1 +36.3444054 -086.8608396 Census Tract 101.03, Davidson County, Tennessee 47 037 2411
2 +36.2940028 -086.8777483 Census Tract 101.04, Davidson County, Tennessee 47 037 3002
3 +36.2504208 -086.8521501 Census Tract 101.05, Davidson County, Tennessee 47 037 4839
4 +36.2610013 -086.8023491 Census Tract 101.06, Davidson County, Tennessee 47 037 2948
5 +36.2882537 -086.7728157 Census Tract 102.01, Davidson County, Tennessee 47 037 4283
6 +36.3619781 -086.7746355 Census Tract 102.02, Davidson County, Tennessee 47 037 3919
7 +36.3161492 -086.7261435 Census Tract 103.01, Davidson County, Tennessee 47 037 3914
8 +36.3139482 -086.7125964 Census Tract 103.02, Davidson County, Tennessee 47 037 1589
9 +36.3132279 -086.7006728 Census Tract 103.03, Davidson County, Tennessee 47 037 5114
10 +36.2943965 -086.6864670 Census Tract 104.01, Davidson County, Tennessee 47 037 4734
median_income geometry
1 60000 MULTIPOLYGON (((-86.91752 3...
2 84831 MULTIPOLYGON (((-86.9744 36...
3 61115 MULTIPOLYGON (((-86.89144 3...
4 66940 MULTIPOLYGON (((-86.83089 3...
5 69185 MULTIPOLYGON (((-86.81736 3...
6 81695 MULTIPOLYGON (((-86.82483 3...
7 52806 MULTIPOLYGON (((-86.74132 3...
8 50341 MULTIPOLYGON (((-86.72469 3...
9 46604 MULTIPOLYGON (((-86.71971 3...
10 47025 MULTIPOLYGON (((-86.71149 3...
# burglary_incidents_mapped
census_tracts |>
ggplot() +
geom_sf()
burglary_incidents_mapped_filtered <- st_filter(burglary_incidents_mapped, census_tracts)
census_tracts |>
ggplot() +
geom_sf() +
geom_sf(data = burglary_incidents_mapped_filtered, size = 0.1)
NA
NA
NA
NA
After performing the spatial join, merge in the census data. Note: Make sure that the final dataset contains all census tracts, even those with zero burglaries.
burglary_census_combo <- st_join(burglary_incidents_mapped, census_tracts, join = st_within, left=FALSE)
burglary_census_combo
Simple feature collection with 1142 features and 44 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -87.02 ymin: 35.99 xmax: -86.557 ymax: 36.34
Geodetic CRS: NAD83
Perform some exploratory analysis on your prepared dataset.
Classes of the two datasets:
class(census_tracts)
[1] "sf" "data.frame"
class(burglary_census_combo)
[1] "sf" "tbl_df" "tbl" "data.frame"
Curious as to the highest number of victims in one burglary.
burglary_census_combo |>
filter(victim_number == max(victim_number, na.rm = TRUE))
Simple feature collection with 1 feature and 44 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -86.69 ymin: 36.15 xmax: -86.69 ymax: 36.15
Geodetic CRS: NAD83
Limit dataset to non repeated incident numbers and locate the highest number of victims per indcident number.
real_num_burglaries <- burglary_census_combo |>
group_by(incident_number) |>
filter(victim_number == max(victim_number, na.rm = TRUE)) |>
arrange(desc(victim_number))
real_num_burglaries
Simple feature collection with 894 features and 44 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -87.02 ymin: 35.99 xmax: -86.557 ymax: 36.34
Geodetic CRS: NAD83
Calculate the accurate number of burglaries in each tract.
burglaries_per_tract_real <- real_num_burglaries |>
st_drop_geometry() |>
group_by(TRACTCE) |>
count(name = "num_burglaries") |>
arrange(desc(num_burglaries))
burglaries_per_tract_real
NA
Comparing the non filtered number to the result before filtering:
burglaries_per_tract <- burglary_census_combo |>
st_drop_geometry() |>
group_by(TRACTCE) |>
count(name = "num_burglaries") |>
arrange(desc(num_burglaries))
burglaries_per_tract
NA
Aggregate the data by census tract. Warning: each incident can appear multiple times if there are multiple victims, so be sure that you aren’t double-counting any incidents.
burglaries_per_tract_real
NA